1 Pre-processing

1.1 Load package

library("RColorBrewer")
library(Signac)
library(Seurat)
library(GenomicRanges)
library(future)
#library(SeuratWrappers)
library(harmony)
library(EnsDb.Hsapiens.v86)
library(stringr)
library(dplyr)
library(ggplot2)
library(patchwork)
library(kableExtra)
library(tidyverse)

set.seed(123)

1.2 Parameters

# Paths
path_to_obj <- ("~/Documents/multiome_tonsil_Lucia/results/R_objects/13.tonsil_multiome_bcells_without_doublets_normalized.rds")

path_to_markers<-("~/Documents/multiome_tonsil_Lucia/results/tables/tonsil_markers_bcell_01.csv")

# Thresholds
max_doublet_score_rna <- 0.3

1.3 Load data

tonsil_wnn_bcell <- readRDS(path_to_obj)

tonsil_markers_01<-read_csv(path_to_markers)
## New names:
## * `` -> ...1
## Rows: 4679 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): ...1, gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

2 UMAP

 DimPlot(
    tonsil_wnn_bcell,
    group.by = "wsnn_res.0.1",
    reduction = "wnn.umap",
    pt.size = 0.1, label = T
  )

2.1 Get top n markers of each cluster

Resolution 0.1

top5_tonsil_markers_01<-tonsil_markers_01 %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
top7_tonsil_markers_01<-tonsil_markers_01 %>% group_by(cluster) %>% top_n(n = 7, wt = avg_log2FC)
top10_tonsil_markers_01<-tonsil_markers_01 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
df_top5<-as.data.frame(top5_tonsil_markers_01)
kbl(df_top5,caption = "Table of the top 5 marker of each cluster resolution 0.005") %>%
  kable_paper("striped", full_width = F)
Table 1: Table of the top 5 marker of each cluster resolution 0.005
…1 p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
ANK3 0 1.780595 0.549 0.239 0 0 ANK3
AL355076.2 0 1.717144 0.454 0.100 0 0 AL355076.2
SSPN 0 1.556627 0.360 0.109 0 0 SSPN
TBC1D9 0 1.386746 0.502 0.174 0 0 TBC1D9
ATXN1 0 1.373068 0.487 0.197 0 0 ATXN1
COL19A1 0 2.235729 0.844 0.288 0 1 COL19A1
STEAP1B 0 1.984336 0.568 0.169 0 1 STEAP1B
LIX1-AS1 0 1.642568 0.323 0.108 0 1 LIX1-AS1
ST6GALNAC3 0 1.466850 0.499 0.155 0 1 ST6GALNAC3
FCER2 0 1.444418 0.464 0.124 0 1 FCER2
HMGB2 0 2.937971 0.956 0.158 0 2 HMGB2
TUBA1B 0 2.856285 0.968 0.243 0 2 TUBA1B
H2AFZ 0 2.709261 0.969 0.254 0 2 H2AFZ
HIST1H4C 0 2.469442 0.854 0.328 0 2 HIST1H4C
TOP2A 0 2.453865 0.816 0.038 0 2 TOP2A
MAML31 0 2.641743 0.837 0.248 0 3 MAML3
AC023590.11 0 2.528443 0.986 0.297 0 3 AC023590.1
AC104170.1 0 2.441449 0.823 0.167 0 3 AC104170.1
RAPGEF51 0 2.225608 0.932 0.255 0 3 RAPGEF5
LHFPL21 0 2.153702 0.798 0.247 0 3 LHFPL2
FYB1 0 2.493123 0.881 0.027 0 4 FYB1
INPP4B 0 2.431992 0.823 0.029 0 4 INPP4B
THEMIS 0 2.035150 0.731 0.005 0 4 THEMIS
PRKCH 0 1.878844 0.885 0.138 0 4 PRKCH
IL7R 0 1.798465 0.680 0.012 0 4 IL7R
IGHGP1 0 5.711377 0.498 0.151 0 5 IGHGP
IGLC11 0 5.580136 0.829 0.473 0 5 IGLC1
IGKC1 0 5.655292 0.961 0.918 0 5 IGKC
IGHA11 0 6.011129 0.648 0.436 0 5 IGHA1
IGLC21 0 5.591442 0.894 0.761 0 5 IGLC2
df_top7<-as.data.frame(top7_tonsil_markers_01)
df_mark<-as.data.frame(tonsil_markers_01)
kbl(df_top7,caption = "Table of the top 5 marker of each cluster resolution 0.005") %>%
  kable_paper("striped", full_width = F)
Table 2: Table of the top 5 marker of each cluster resolution 0.005
…1 p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
ANK3 0 1.780595 0.549 0.239 0 0 ANK3
AL355076.2 0 1.717144 0.454 0.100 0 0 AL355076.2
SSPN 0 1.556627 0.360 0.109 0 0 SSPN
TBC1D9 0 1.386746 0.502 0.174 0 0 TBC1D9
ATXN1 0 1.373068 0.487 0.197 0 0 ATXN1
ZDHHC14 0 1.361222 0.677 0.407 0 0 ZDHHC14
HIPK2 0 1.356845 0.330 0.162 0 0 HIPK2
COL19A1 0 2.235729 0.844 0.288 0 1 COL19A1
STEAP1B 0 1.984336 0.568 0.169 0 1 STEAP1B
LIX1-AS1 0 1.642568 0.323 0.108 0 1 LIX1-AS1
ST6GALNAC3 0 1.466850 0.499 0.155 0 1 ST6GALNAC3
FCER2 0 1.444418 0.464 0.124 0 1 FCER2
GAB1 0 1.435708 0.349 0.130 0 1 GAB1
PTPRK 0 1.411378 0.516 0.168 0 1 PTPRK
HMGB2 0 2.937971 0.956 0.158 0 2 HMGB2
TUBA1B 0 2.856285 0.968 0.243 0 2 TUBA1B
H2AFZ 0 2.709261 0.969 0.254 0 2 H2AFZ
HIST1H4C 0 2.469442 0.854 0.328 0 2 HIST1H4C
TOP2A 0 2.453865 0.816 0.038 0 2 TOP2A
STMN1 0 2.439747 0.942 0.109 0 2 STMN1
TUBB 0 2.282930 0.949 0.197 0 2 TUBB
MAML31 0 2.641743 0.837 0.248 0 3 MAML3
AC023590.11 0 2.528443 0.986 0.297 0 3 AC023590.1
AC104170.1 0 2.441449 0.823 0.167 0 3 AC104170.1
RAPGEF51 0 2.225608 0.932 0.255 0 3 RAPGEF5
LHFPL21 0 2.153702 0.798 0.247 0 3 LHFPL2
AFF21 0 2.024695 0.929 0.278 0 3 AFF2
FGD61 0 1.984783 0.860 0.246 0 3 FGD6
FYB1 0 2.493123 0.881 0.027 0 4 FYB1
INPP4B 0 2.431992 0.823 0.029 0 4 INPP4B
THEMIS 0 2.035150 0.731 0.005 0 4 THEMIS
PRKCH 0 1.878844 0.885 0.138 0 4 PRKCH
IL7R 0 1.798465 0.680 0.012 0 4 IL7R
TC2N 0 1.739122 0.783 0.042 0 4 TC2N
LEF1 0 1.735432 0.623 0.018 0 4 LEF1
IGHGP1 0 5.711377 0.498 0.151 0 5 IGHGP
IGHG31 0 5.571913 0.696 0.327 0 5 IGHG3
IGLC11 0 5.580136 0.829 0.473 0 5 IGLC1
IGKC1 0 5.655292 0.961 0.918 0 5 IGKC
IGHA11 0 6.011129 0.648 0.436 0 5 IGHA1
IGLC21 0 5.591442 0.894 0.761 0 5 IGLC2
IGLC31 0 5.518655 0.721 0.603 0 5 IGLC3

2.1.1 Dynamic table top 7

#install.packages("htmlwidgets", type = "binary")
#install.packages("DT", type = "binary")

DT::datatable(df_top7)

2.1.2 Dynamic table of all markers

DT::datatable(df_mark)

2.1.3 Dotplot

markerGenes <- unique(tonsil_markers_01$gene)

geneSym <- ifelse(test = !grepl('NA', markerGenes), 
             yes = sub('.*?-', '', markerGenes),
             no = sub('-.*', '', markerGenes))


dot.10 <- DotPlot(tonsil_wnn_bcell, features = unique(top10_tonsil_markers_01$gene),cols = 'RdBu', cluster.idents = T) + theme(axis.text.x = element_text( size = 10, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels= geneSym)+ggtitle("res 0.1 top 10 of each cluster")

dot.5 <- DotPlot(tonsil_wnn_bcell, features = unique(top5_tonsil_markers_01$gene),cols = 'RdBu', cluster.idents = T) + theme(axis.text.x = element_text( size = 10, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels= geneSym)+ggtitle("res 0.1 top 5 of each cluster")
dot.10 +
  coord_flip() +
  theme(axis.title = element_blank(), axis.text.y = element_text(size = 5))

dot.5 +
  coord_flip() +
  theme(axis.title = element_blank(), axis.text.y = element_text(size = 7))

3 Markers exploration

top7mark_cluster0<-top7_tonsil_markers_01[["gene"]][1:7]
top7mark_cluster1<-top7_tonsil_markers_01[["gene"]][8:14]
top7mark_cluster2<-top7_tonsil_markers_01[["gene"]][15:21]
top7mark_cluster3<-top7_tonsil_markers_01[["gene"]][22:28]
top7mark_cluster4<-top7_tonsil_markers_01[["gene"]][29:35]
top7mark_cluster5<-top7_tonsil_markers_01[["gene"]][36:42]
markers_gg <- function(x){purrr::map(x, function(x) {
  
  p <- FeaturePlot(
    tonsil_wnn_bcell,
    features = x,
    reduction = "wnn.umap",
    pt.size = 0.1
  )
  p
})}

3.1 Iñaki markers

m<-c("PRDM1","XBP1","IRF4","MEF2B","BCL6")

DZ<-c("SUGCT", "CXCR4", "AICDA")

LZ<- c("CD83","BCL2A1")

GC<- c("MEF2B", "BCL6","IRF4")

PC<- c("PRDM1","SLAMF7", "MZB1", "FKBP11")

3.1.1 Dark Zone

markers_gg(DZ)
## [[1]]

## 
## [[2]]

## 
## [[3]]

3.1.2 Light Zone

markers_gg(LZ)
## [[1]]

## 
## [[2]]

3.1.3 Germinal Center

markers_gg(GC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

3.1.4 Plasma cell

markers_gg(PC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

3.2 Top 7 of each cluster

3.2.1 Cluster 0

markers_gg(top7mark_cluster0)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

naive_markers<-c("CD79A", "CD79B", "BLNK")
memory_markers<-c("CD27")
markers_gg(naive_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(memory_markers)
## [[1]]

markers_gg(c("MS4A1","NT5E"))
## [[1]]

## 
## [[2]]

MS4A1:NAIVE-MEMORY B CELL NT5E: NAIVE B CELL

3.2.2 Cluster 1:

markers_gg(top7mark_cluster1)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.2.3 Cluster 2:

markers_gg(top7mark_cluster2)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.2.4 Cluster 3:

markers_gg(top7mark_cluster3)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

INPP4B: Immune cell enhanced (memory CD4 T-cell)

3.2.5 Cluster 4

AOAH: NK GNLY:NK

markers_gg(top7mark_cluster4)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.2.6 Cluster 5

markers_gg(top7mark_cluster5)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.3 Markers FOXO1, NFKB1

 p <- FeaturePlot(
    tonsil_wnn_bcell,
    features = c("NFKB1","FOXO1"),
    reduction = "wnn.umap",
    pt.size = 0.1
  )
p

4 Mitocondrial

FeaturePlot(
    tonsil_wnn_bcell,
    features = "percent.mt",
    reduction = "wnn.umap",
    pt.size = 0.1
  )

VlnPlot(tonsil_wnn_bcell, features = "percent.mt", group.by = "wsnn_res.0.1", pt.size=0)

VlnPlot(tonsil_wnn_bcell, features = "percent_ribo", group.by = "wsnn_res.0.1", pt.size=0)

5 Number of cell in each cluster

Idents(tonsil_wnn_bcell) <- "wsnn_res.0.05"

cell.num <- table(Idents(tonsil_wnn_bcell))
cell.num
## 
##     0     1     2     3     4 
## 26883  7186  6602  2102  1860

6 Rename clusters

new.cluster.ids <- c("Naive/MBC", "Naive CD4 T-celL","GC/DZ", "GC/LZ", "NK T-cell", "PC", "Monocytes","NI")
names(new.cluster.ids) <- levels(tonsil_wnn_bcell)
tonsil_wnn_bcell <- RenameIdents(tonsil_wnn_bcell, new.cluster.ids)
DimPlot(tonsil_wnn_bcell, reduction = "wnn.umap", label = TRUE, pt.size = 0.5) 

7 Bibliography markers

MARKERS

Immature B cells express CD19, CD 20, CD34, CD38, and CD45R, T-cell receptor/CD3 complex (TCR/CD3 complex)

  • T-cells (identified by high expression of CD3D and CD3E).
  • monocytes (identified by high expression of LYZ and S100A8).
  • naive B-cells (identified by high expression of CD79A, CD79B and BLNK).
  • plasma cells (identified by B-cell and proliferation markers, such as TOP2A or MKI67).
  • poor-quality cells (identified by high mitochondrial expression). If a cell has pores in the membrane due to cell lysis, the cytosolic mRNA will leak out of the cell; but if the diameter of mitochondria is higher than the pores, they will get trapped inside the cell.
canonical_bcell_markers <-c("CD34", "CD38", "CD19")

monocytes_markers<-c("LYZ","S100A8")

naive_markers<-c("CD79A", "CD79B", "BLNK")

bib_Bcell_markers<-c("CD19","CR2","MS4A1","RALGPS2","CD79A")
bib_Tcell_markers<-c("CD3E","CD4","CD8A","FOXP3","IL17A")

7.1 B cells

markers_gg(naive_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(bib_Bcell_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

7.2 T-cells

CD8+ T cell markers:“CD3D”, “CD8A” NK cell markers:“GNLY”, “NKG7”

markers_gg(bib_Tcell_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

8 CellCycleScoring

s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

tonsil_wnn_bcell <- CellCycleScoring(tonsil_wnn_bcell, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
## Warning: The following features are not present in the object: MLF1IP, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: FAM64A, HN1, not
## searching for symbol synonyms
head(tonsil_wnn_bcell[[]])
##                                              lib_name_barcode    orig.ident
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 BCLL_15_T_1_AAACAGCCAGCAACCT-1 SeuratProject
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 BCLL_15_T_1_AAACAGCCAGCTTAGC-1 SeuratProject
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 BCLL_15_T_1_AAACATGCAGGCCAAA-1 SeuratProject
## BCLL_15_T_1_AAACCAACACGAATTT-1 BCLL_15_T_1_AAACCAACACGAATTT-1 SeuratProject
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 BCLL_15_T_1_AAACCGAAGCTATGAC-1 SeuratProject
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 BCLL_15_T_1_AAACCGAAGTAAAGGT-1 SeuratProject
##                                nCount_RNA nFeature_RNA nCount_ATAC
## BCLL_15_T_1_AAACAGCCAGCAACCT-1       2938         1493       14475
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1       5693         2527       14100
## BCLL_15_T_1_AAACATGCAGGCCAAA-1       2377         1121       12678
## BCLL_15_T_1_AAACCAACACGAATTT-1       6476         2543       11978
## BCLL_15_T_1_AAACCGAAGCTATGAC-1       2285         1078       16821
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1       5027         2220       15923
##                                nFeature_ATAC nucleosome_signal
## BCLL_15_T_1_AAACAGCCAGCAACCT-1          6069         0.9178862
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1          5960         0.7073955
## BCLL_15_T_1_AAACATGCAGGCCAAA-1          5233         0.5805921
## BCLL_15_T_1_AAACCAACACGAATTT-1          5077         0.5724638
## BCLL_15_T_1_AAACCGAAGCTATGAC-1          6613         0.5307644
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1          6633         0.6731493
##                                nucleosome_percentile TSS.enrichment
## BCLL_15_T_1_AAACAGCCAGCAACCT-1                  0.88       4.890692
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1                  0.44       3.685808
## BCLL_15_T_1_AAACATGCAGGCCAAA-1                  0.15       5.826994
## BCLL_15_T_1_AAACCAACACGAATTT-1                  0.14       5.295245
## BCLL_15_T_1_AAACCGAAGCTATGAC-1                  0.08       5.239743
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1                  0.35       4.178657
##                                TSS.percentile tss.level percent.mt percent_ribo
## BCLL_15_T_1_AAACAGCCAGCAACCT-1           0.31      High   9.326072     5.616065
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1           0.03      High   3.249605     4.180573
## BCLL_15_T_1_AAACATGCAGGCCAAA-1           0.73      High  13.378208    19.099706
## BCLL_15_T_1_AAACCAACACGAATTT-1           0.50      High   2.424336     2.362569
## BCLL_15_T_1_AAACCGAAGCTATGAC-1           0.48      High  14.529540    15.229759
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1           0.09      High   5.390889     2.048936
##                                nCount_peaks nFeature_peaks library_name
## BCLL_15_T_1_AAACAGCCAGCAACCT-1         7403           6067  BCLL_15_T_1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1         7876           6443  BCLL_15_T_1
## BCLL_15_T_1_AAACATGCAGGCCAAA-1         5989           4857  BCLL_15_T_1
## BCLL_15_T_1_AAACCAACACGAATTT-1         5910           4926  BCLL_15_T_1
## BCLL_15_T_1_AAACCGAAGCTATGAC-1         8042           6239  BCLL_15_T_1
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1         8606           6978  BCLL_15_T_1
##                                 donor_id  sex age   age_group hospital    assay
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACCAACACGAATTT-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 BCLL-15-T male  33 young_adult     CIMA multiome
##                                          barcodes doublet_scores
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 AAACAGCCAGCAACCT-1          0.020
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 AAACAGCCAGCTTAGC-1          0.024
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 AAACATGCAGGCCAAA-1          0.019
## BCLL_15_T_1_AAACCAACACGAATTT-1 AAACCAACACGAATTT-1          0.015
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 AAACCGAAGCTATGAC-1          0.020
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 AAACCGAAGTAAAGGT-1          0.016
##                                predicted_doublets peaks.weight RNA.weight
## BCLL_15_T_1_AAACAGCCAGCAACCT-1              FALSE    0.5213808  0.4786192
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1              FALSE    0.4637829  0.5362171
## BCLL_15_T_1_AAACATGCAGGCCAAA-1              FALSE    0.5030760  0.4969240
## BCLL_15_T_1_AAACCAACACGAATTT-1              FALSE    0.5155517  0.4844483
## BCLL_15_T_1_AAACCGAAGCTATGAC-1              FALSE    0.5918682  0.4081318
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1              FALSE    0.4687815  0.5312185
##                                wsnn_res.0.005 wsnn_res.0.01 seurat_clusters
## BCLL_15_T_1_AAACAGCCAGCAACCT-1              0             0               1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1              2             1               3
## BCLL_15_T_1_AAACATGCAGGCCAAA-1              0             0               0
## BCLL_15_T_1_AAACCAACACGAATTT-1              0             0               6
## BCLL_15_T_1_AAACCGAAGCTATGAC-1              0             0               0
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1              2             1               3
##                                sub.cluster_0.25 sub.cluster0_0.5 is_doublet
## BCLL_15_T_1_AAACAGCCAGCAACCT-1                0              0_4      FALSE
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1                2                2      FALSE
## BCLL_15_T_1_AAACATGCAGGCCAAA-1                0              0_0      FALSE
## BCLL_15_T_1_AAACCAACACGAATTT-1                0              0_3      FALSE
## BCLL_15_T_1_AAACCGAAGCTATGAC-1                0              0_0      FALSE
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1                2                2      FALSE
##                                wsnn_res.0.05 wsnn_res.0.75 wsnn_res.0.075
## BCLL_15_T_1_AAACAGCCAGCAACCT-1             0             1              0
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1             2             4              3
## BCLL_15_T_1_AAACATGCAGGCCAAA-1             0             0              1
## BCLL_15_T_1_AAACCAACACGAATTT-1             0             9              0
## BCLL_15_T_1_AAACCGAAGCTATGAC-1             0             0              1
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1             2             4              3
##                                is_tcell sub.cluster2 wsnn_res.0.1 wsnn_res.0.25
## BCLL_15_T_1_AAACAGCCAGCAACCT-1    FALSE            0            0             1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1    FALSE            3            3             3
## BCLL_15_T_1_AAACATGCAGGCCAAA-1    FALSE            0            1             0
## BCLL_15_T_1_AAACCAACACGAATTT-1    FALSE            0            0             6
## BCLL_15_T_1_AAACCGAAGCTATGAC-1    FALSE            0            1             0
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1    FALSE            3            3             3
##                                     S.Score   G2M.Score Phase old.ident
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 -0.032117648 -0.11812569    G1         0
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 -0.007383767 -0.18545173    G1         2
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 -0.034289719 -0.08897424    G1         0
## BCLL_15_T_1_AAACCAACACGAATTT-1 -0.134934431 -0.14585202    G1         0
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 -0.106464784 -0.13573257    G1         0
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 -0.143077150 -0.12467942    G1         2
print(x = tonsil_wnn_bcell[["pca"]], 
      dims = 1:10, 
      nfeatures = 5)
## PC_ 1 
## Positive:  MKI67, TOP2A, TPX2, HMGB2, NUSAP1 
## Negative:  POLD3, POLA1, MCM5, CCNE2, G2E3 
## PC_ 2 
## Positive:  MCM4, CLSPN, HELLS, DTL, PCNA 
## Negative:  GAS2L3, AURKA, CENPE, HMMR, CDC20 
## PC_ 3 
## Positive:  ANLN, E2F8, RRM2, CDC25C, NDC80 
## Negative:  CDC20, CCNB2, CKS2, CKS1B, NEK2 
## PC_ 4 
## Positive:  TYMS, FEN1, CKS1B, E2F8, RRM2 
## Negative:  G2E3, GAS2L3, DTL, POLA1, LBR 
## PC_ 5 
## Positive:  LBR, G2E3, CBX5, SLBP, NCAPD2 
## Negative:  POLD3, EXO1, CDC6, GAS2L3, DTL 
## PC_ 6 
## Positive:  LBR, CCNE2, SLBP, WDR76, CDCA7 
## Negative:  G2E3, MCM5, NASP, MCM2, POLD3 
## PC_ 7 
## Positive:  POLD3, G2E3, ANP32E, NCAPD2, LBR 
## Negative:  MCM5, GAS2L3, MCM2, GINS2, KIF20B 
## PC_ 8 
## Positive:  G2E3, CCNE2, DSCC1, CDC6, NEK2 
## Negative:  POLD3, MCM5, CKAP5, KIF20B, CBX5 
## PC_ 9 
## Positive:  POLA1, ANP32E, NASP, DTL, TMPO 
## Negative:  SLBP, CENPA, POLD3, CCNE2, MCM5 
## PC_ 10 
## Positive:  CKAP2, TMPO, NCAPD2, MCM5, DSCC1 
## Negative:  NASP, ANP32E, KIF20B, CKAP5, SLBP

PCNA: Proliferating cell nuclear antigen

# Visualize the distribution of cell cycle markers across
RidgePlot(tonsil_wnn_bcell, features = c("PCNA", "TOP2A", "MCM6", "MKI67"), ncol = 2)
## Picking joint bandwidth of 0.0824
## Picking joint bandwidth of 0.0756
## Picking joint bandwidth of 0.0616
## Picking joint bandwidth of 0.072

tonsil_wnn_bcell <- RunPCA(tonsil_wnn_bcell, features = c(s.genes, g2m.genes))
## Warning in PrepDR(object = object, features = features, verbose = verbose): The
## following 19 features requested have not been scaled (running reduction without
## them): UNG, PRIM1, UHRF1, MLF1IP, RFC2, RPA2, UBR7, MSH2, RAD51, TIPIN, BLM,
## CASP8AP2, USP1, CHAF1B, FAM64A, HN1, RANGAP1, PSRC1, CTCF
## Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large
## a percentage of total singular values, use a standard svd instead.
## PC_ 1 
## Positive:  MKI67, TOP2A, TPX2, HMGB2, NUSAP1, CENPF, CDK1, CENPE, AURKB, GTSE1 
##     ANLN, NDC80, TUBB4B, BUB1, KIF11, HMMR, BIRC5, DLGAP5, SMC4, UBE2C 
##     RRM2, CDCA2, NUF2, ECT2, CDCA3, KIF23, CDCA8, KIF2C, CKAP2L, CCNB2 
## Negative:  POLD3, POLA1, MCM5, CCNE2, G2E3, SLBP, CDC6, MCM2, EXO1, MCM6 
##     NASP, DTL, GINS2, DSCC1, GAS2L3, CDC45, CENPA, HELLS, ATAD2, CKAP2 
##     WDR76, TYMS, NEK2, LBR, CBX5, GMNN, MCM4, FEN1, RAD51AP1, TMPO 
## PC_ 2 
## Positive:  MCM4, CLSPN, HELLS, DTL, PCNA, CDC45, GINS2, CDC6, MCM6, WDR76 
##     BRIP1, ATAD2, POLA1, EXO1, CCNE2, FEN1, SLBP, MCM5, MCM2, RRM2 
##     E2F8, GMNN, DSCC1, POLD3, RRM1, NASP, TYMS, CDCA7, RAD51AP1, CBX5 
## Negative:  GAS2L3, AURKA, CENPE, HMMR, CDC20, UBE2C, NEK2, DLGAP5, CENPF, CENPA 
##     KIF23, CCNB2, CDCA8, TPX2, BUB1, TOP2A, CDCA3, TTK, HJURP, G2E3 
##     CKAP2L, GTSE1, CKS2, CKAP2, CDC25C, ECT2, NUF2, CKAP5, KIF2C, TUBB4B 
## PC_ 3 
## Positive:  ANLN, E2F8, RRM2, CDC25C, NDC80, RAD51AP1, KIF11, ECT2, TMPO, BRIP1 
##     HJURP, CKAP2L, CDCA2, KIF23, EXO1, G2E3, GTSE1, TTK, CDK1, DSCC1 
##     ATAD2, BUB1, CKAP5, MKI67, TYMS, KIF20B, SMC4, POLA1, RRM1, NUSAP1 
## Negative:  CDC20, CCNB2, CKS2, CKS1B, NEK2, BIRC5, GINS2, HMGB2, TUBB4B, DTL 
##     NASP, MCM2, ANP32E, MCM5, MCM4, MCM6, SLBP, CENPF, AURKA, CDCA7 
##     UBE2C, CDC6, CDCA3, GMNN, HMMR, PCNA, CBX5, CDC45, LBR, CENPA 
## PC_ 4 
## Positive:  TYMS, FEN1, CKS1B, E2F8, RRM2, RRM1, AURKB, PCNA, CDCA3, TUBB4B 
##     ANP32E, CKS2, RAD51AP1, UBE2C, HMGB2, BIRC5, MCM2, NDC80, MKI67, GTSE1 
##     CDK1, DSCC1, TOP2A, KIF2C, NCAPD2, HJURP, NUSAP1, GMNN, CDCA8, CKAP2L 
## Negative:  G2E3, GAS2L3, DTL, POLA1, LBR, KIF20B, CDCA7, BRIP1, CENPA, NEK2 
##     POLD3, HELLS, MCM6, CKAP2, WDR76, CKAP5, CDC45, EXO1, CDCA2, ECT2 
##     CDC6, TTK, CENPE, AURKA, CCNB2, SMC4, HMMR, ATAD2, CENPF, CCNE2 
## PC_ 5 
## Positive:  LBR, G2E3, CBX5, SLBP, NCAPD2, CDCA7, WDR76, NASP, ANP32E, TMPO 
##     SMC4, CKAP5, CKS2, TACC3, HMGB2, MCM5, KIF11, HELLS, POLA1, NUSAP1 
##     RRM1, MCM2, ATAD2, E2F8, MKI67, RRM2, TPX2, BRIP1, NUF2, KIF20B 
## Negative:  POLD3, EXO1, CDC6, GAS2L3, DTL, NEK2, CENPA, CDC45, CKAP2, CCNE2 
##     AURKA, RAD51AP1, TTK, CDC20, MCM4, HMMR, CDK1, KIF2C, DLGAP5, GINS2 
##     ECT2, CLSPN, MCM6, CENPE, HJURP, UBE2C, CDC25C, KIF23, NDC80, CDCA8
tonsil_wnn_bcell <- RunUMAP(object = tonsil_wnn_bcell,
  nn.name = "weighted.nn",
  reduction.name = "wnn.umap",
  reduction.key = "wnnUMAP_" )
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 14:45:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 14:45:05 Commencing smooth kNN distance calibration using 1 thread
## 14:45:08 Initializing from normalized Laplacian + noise
## 14:45:10 Commencing optimization for 200 epochs, with 1412680 positive edges
## 14:45:44 Optimization finished
DimPlot(tonsil_wnn_bcell,
    reduction = "wnn.umap",
    pt.size = 0.1, label = T, split.by = "age_group")

DimPlot(tonsil_wnn_bcell,
    reduction = "wnn.umap",
    pt.size = 0.1, label = T)

tonsil_wnn_bcell@meta.data$Phase<-as.character(tonsil_wnn_bcell@meta.data$Phase.y)
Idents(tonsil_wnn_bcell) <- "Phase.y"

DimPlot(tonsil_wnn_bcell,reduction = "wnn.umap")
tonsil_wnn_bcell@meta.data<- tonsil_wnn_bcell@meta.data %>% relocate("old.ident", .after = last_col())
DimPlot(tonsil_wnn_bcell,reduction = "wnn.umap", group.by = "Phase.y", cols = c("red","blue","yellow"))